***Replication File for "Forecasting Migration Movements using Prediction Markets" as of August 7, 2024***

***PARTICIPATION***
import delimited "YOUR_PATH/pm_migration_2020.csv", clear 

*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"

*daily
gen clockstring = time
gen double daily = date(clockstring, "YMDhm")
format daily %td
encode userlogin, gen(user_nr)
encode contract_id, gen(contract_nr)
collapse (count) trade_amount,  by(daily user_nr contract_nr)
*collapse (sum) trade_amount (count) contract_nr, by(user_nr daily)
collapse (sum) trade_amount (count) user_nr, by(daily)
drop if daily > daily("18may2020","YMDhm")
*drop opening day
drop in 1
*fill in zeros
tsset daily
tsfill 
replace trade_amount=0 if trade_amount==.
*replace contract_nr=0 if contract_nr==.
replace user_nr=0 if user_nr==.

*FIGURE 1
*set scheme
set scheme tab1
set scheme white_tableau
twoway (bar trade_amount daily, yaxis(1)) (line user_nr daily, yaxis(2))


***INTERPOLATE THE DATA***

***GERMANY***
import delimited "YOUR_PATH/pm_migration_2020.csv", clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="J92WZK5uABziwrosz"
rename prices contract4
rename v19 contract5
rename v20 contract1
rename v21 contract2
rename v22 contract3

replace contract1 = subinstr(contract1, "hDe2ejSkM35AMyXAL: ", "", .)
replace contract2 = subinstr(contract2, "pAdETm3Sg772HMWoT: ", "", .)
replace contract3 = subinstr(contract3, "pJecSpwSXpz4ZxBP2: ", "", .)
replace contract4 = subinstr(contract4, "4BTFqGahpWbk9po9u: ", "", .)
replace contract5 = subinstr(contract5, "JDHg6foQoeNRbJz9L: ", "", .)

destring contract1, replace
destring contract2, replace
destring contract3, replace
destring contract4, replace
destring contract5, replace

gen clockstring = time

*collapse at the level of the minutes
collapse (first) contract1 contract2 contract3 contract4 contract5,  by(clockstring)

***generate contracts 0, 800, 1k, 1.2k, 1.4k & 2.2k (symmetric max.!)
g contract0=0
egen contract2_=rowtotal(contract1 - contract2)
egen contract3_=rowtotal(contract1 - contract3)
egen contract4_=rowtotal(contract1 - contract4)
egen contract5_=rowtotal(contract1 - contract5)

drop contract2 contract3 contract4 contract5
ren contract1 contract800
ren contract2_ contract1000
ren contract3_ contract1200
ren contract4_ contract1400
ren contract5_ contract2200

*reshape the values so 
list
reshape long contract, i(clockstring) j(probability)
set more off

*round two digits
g contract_r = contract*1000
g contract_r2= round(contract_r, 1)

drop contract contract_r
ren contract_r2 contract

*reshape again but using probabilities
 reshape wide probability, i(clockstring) j(contract) 
set more off

 *fill will the probabilities we are looking for
g probability10000=.
g probability50000=.
g probability90000=.

*reshape so we can do the mipolation
 reshape long probability, i(clockstring) j(contract) 
 set more off
 
*set back the digits
g contract_r= contract/1000
drop contract
ren contract_r contract 

*mipolation, go, go
by clockstring: mipolate probability contract, pchip generate(i_probability)
drop probability
ren i_probability probability

*let's keep only the interesting information...
drop if contract <10
drop if contract>10 & contract<50
drop if contract>50 & contract<90
drop if contract>90

*...so we can then reshape properly (last time I reshape, promise)
 reshape wide probability, i(clockstring) j(contract) 

*reverse the scale if referendum
gen p10=probability90
gen p50=probability50
gen p90=probability10

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) p10 p50 p90, by(daily)
list daily p10 p50 p90

*result
gen result2020=994.819
gen result2019=1345.943
*tsforecasts (see below)
gen drift=1416.137
gen _smooth=1408.482
gen _smooth_lb=1133.242
gen _smooth_ub=1683.721
gen _arima=1430.052
gen _arima_lb=1123.144
gen _arima_ub=1736.960

gen country="Germany"
gen issue="immigration"

save "Germany_immigration2020", replace


****************************************************************
***SPAIN***
import delimited "YOUR_PATH/pm_migration_2020.csv", clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="ZRuSzJPPTYXtxP4zT"
rename prices contract5
rename v19 contract4
rename v20 contract1
rename v21 contract3
rename v22 contract2

replace contract1 = subinstr(contract1, "ff4t3QBp7nQqJtCsQ: ", "", .)
replace contract2 = subinstr(contract2, "vkf85qJoQXzSkuqqo: ", "", .)
replace contract3 = subinstr(contract3, "h29SFxqtZxGDfpccP: ", "", .)
replace contract4 = subinstr(contract4, "FJzxvTHe6FcAZNfoB: ", "", .)
replace contract5 = subinstr(contract5, "943miiXyzFnr456T8: ", "", .)

destring contract1, replace
destring contract2, replace
destring contract3, replace
destring contract4, replace
destring contract5, replace

gen clockstring = time

*collapse at the level of the minutes
collapse (first) contract1 contract2 contract3 contract4 contract5,  by(clockstring)

***generate contracts 0, 400, 500, 600, 700 & 1100 (symmetric max.!)
g contract0=0
egen contract2_=rowtotal(contract1 - contract2)
egen contract3_=rowtotal(contract1 - contract3)
egen contract4_=rowtotal(contract1 - contract4)
egen contract5_=rowtotal(contract1 - contract5)

drop contract2 contract3 contract4 contract5
ren contract1 contract400
ren contract2_ contract500
ren contract3_ contract600
ren contract4_ contract700
ren contract5_ contract1100

*reshape the values so 
list
reshape long contract, i(clockstring) j(probability)
set more off

*round two digits
g contract_r = contract*1000
g contract_r2= round(contract_r, 1)

drop contract contract_r
ren contract_r2 contract

*reshape again but using probabilities
 reshape wide probability, i(clockstring) j(contract) 
set more off

 *fill will the probabilities we are looking for
g probability10000=.
g probability50000=.
g probability90000=.

*reshape so we can do the mipolation
 reshape long probability, i(clockstring) j(contract) 
 set more off
 
*set back the digits
g contract_r= contract/1000
drop contract
ren contract_r contract 

*mipolation, go, go
by clockstring: mipolate probability contract, pchip generate(i_probability)
drop probability
ren i_probability probability

*let's keep only the interesting information...

drop if contract <10
drop if contract>10 & contract<50
drop if contract>50 & contract<90
drop if contract>90

*...so we can then reshape properly (last time I reshape, promise)
 reshape wide probability, i(clockstring) j(contract) 

*reverse the scale if referendum
gen p10=probability90
gen p50=probability50
gen p90=probability10

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) p10 p50 p90, by(daily)
list daily p10 p50 p90

***Comparison with result
*https://www.ine.es/consul/serie.do?d=true&s=EM51131 : 219,483+246,238=465,721
gen result2020=415.150
gen result2019=666.022
*tsforecasts (see below)
gen drift=674.9902
gen _smooth=533.6156
gen _smooth_lb=493.6785	
gen _smooth_ub=573.5526
gen _arima=763.3158
gen _arima_lb=650.6333
gen _arima_ub=875.9984

gen country="Spain"
gen issue="immigration"

save "Spain_immigration2020", replace


****************************************************************
***SWITZERLAND***
import delimited "YOUR_PATH/pm_migration_2020.csv", clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="SPMe8k7ZsK6pcXEQe"
rename prices contract5
rename v19 contract2
rename v20 contract1
rename v21 contract4
rename v22 contract3

replace contract1 = subinstr(contract1, "bA3C4RbGp4qyxNd8r: ", "", .)
replace contract2 = subinstr(contract2, "a8KyZ6vGhmwWDy49w: ", "", .)
replace contract3 = subinstr(contract3, "wcSktGY9a9osHvkER: ", "", .)
replace contract4 = subinstr(contract4, "sCuazYT8DK6Whs4vT: ", "", .)
replace contract5 = subinstr(contract5, "GHwEzPxo3P5cwvMte: ", "", .)

destring contract1, replace
destring contract2, replace
destring contract3, replace
destring contract4, replace
destring contract5, replace

gen clockstring = time

*collapse at the level of the minutes
collapse (first) contract1 contract2 contract3 contract4 contract5,  by(clockstring)

***generate contracts 0, 60, 80, 100, 120 & 180 (symmetric max.!)
g contract0=0
egen contract2_=rowtotal(contract1 - contract2)
egen contract3_=rowtotal(contract1 - contract3)
egen contract4_=rowtotal(contract1 - contract4)
egen contract5_=rowtotal(contract1 - contract5)

drop contract2 contract3 contract4 contract5
ren contract1 contract60
ren contract2_ contract80
ren contract3_ contract100
ren contract4_ contract120
ren contract5_ contract180

*reshape the values so 
list
reshape long contract, i(clockstring) j(probability)
set more off

*round two digits
g contract_r = contract*1000
g contract_r2= round(contract_r, 1)

drop contract contract_r
ren contract_r2 contract

*reshape again but using probabilities
 reshape wide probability, i(clockstring) j(contract) 
set more off

 *fill will the probabilities we are looking for
g probability10000=.
g probability50000=.
g probability90000=.

*reshape so we can do the mipolation
 reshape long probability, i(clockstring) j(contract) 
 set more off
 
*set back the digits
g contract_r= contract/1000
drop contract
ren contract_r contract 

*mipolation, go, go
by clockstring: mipolate probability contract, pchip generate(i_probability)
drop probability
ren i_probability probability

*let's keep only the interesting information...

drop if contract <10
drop if contract>10 & contract<50
drop if contract>50 & contract<90
drop if contract>90

*...so we can then reshape properly (last time I reshape, promise)
 reshape wide probability, i(clockstring) j(contract) 

*reverse the scale if referendum
gen p10=probability90
gen p50=probability50
gen p90=probability10

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) p10 p50 p90, by(daily)
list daily p10 p50 p90

***Comparison with result "Effektive Zuwanderung"
*https://www.sem.admin.ch/sem/de/home/publiservice/statistik/auslaenderstatistik/monitor.html
gen result2020=106.231
gen result2019=107.387
*tsforecasts (see below)
gen drift=106.2599
gen _smooth=107.8192
gen _smooth_lb=105.9507
gen _smooth_ub=109.6877
gen _arima=107.1514
gen _arima_lb=105.8962
gen _arima_ub=108.4066

gen country="Switzerland"
gen issue="immigration"

save "Switzerland_immigration2020", replace


****************************************************************
***GERMANY ASYLUM***
import delimited "YOUR_PATH/pm_migration_2020.csv", clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="ijQhBTyzYf4mB3JSC"
rename prices contract1
rename v19 contract4
rename v20 contract2
rename v21 contract5
rename v22 contract3

replace contract1 = subinstr(contract1, "7eeqQd6FTDZbyKzzQ: ", "", .)
replace contract2 = subinstr(contract2, "Xe8Pv9SeepPD7idWC: ", "", .)
replace contract3 = subinstr(contract3, "obhyDsqW79WkyaGaf: ", "", .)
replace contract4 = subinstr(contract4, "C9vuZYEMtfiza8pco: ", "", .)
replace contract5 = subinstr(contract5, "jf7MyRXm4Aaro4Ab8: ", "", .)

destring contract1, replace
destring contract2, replace
destring contract3, replace
destring contract4, replace
destring contract5, replace

gen clockstring = time

*collapse at the level of the minutes
collapse (first) contract1 contract2 contract3 contract4 contract5,  by(clockstring)

***generate contracts 0, 100, 125, 150, 175 & 275 (symmetric max.!)
g contract0=0
egen contract2_=rowtotal(contract1 - contract2)
egen contract3_=rowtotal(contract1 - contract3)
egen contract4_=rowtotal(contract1 - contract4)
egen contract5_=rowtotal(contract1 - contract5)

drop contract2 contract3 contract4 contract5
ren contract1 contract100
ren contract2_ contract125
ren contract3_ contract150
ren contract4_ contract175
ren contract5_ contract275

*reshape the values so 
list
reshape long contract, i(clockstring) j(probability)
set more off

*round two digits
g contract_r = contract*1000
g contract_r2= round(contract_r, 1)

drop contract contract_r
ren contract_r2 contract

*reshape again but using probabilities
 reshape wide probability, i(clockstring) j(contract) 
set more off

 *fill will the probabilities we are looking for
g probability10000=.
g probability50000=.
g probability90000=.

*reshape so we can do the mipolation
 reshape long probability, i(clockstring) j(contract) 
 set more off
 
*set back the digits
g contract_r= contract/1000
drop contract
ren contract_r contract 

*mipolation, go, go
by clockstring: mipolate probability contract, pchip generate(i_probability)
drop probability
ren i_probability probability

*let's keep only the interesting information...

drop if contract <10
drop if contract>10 & contract<50
drop if contract>50 & contract<90
drop if contract>90

*...so we can then reshape properly (last time I reshape, promise)
 reshape wide probability, i(clockstring) j(contract) 

*reverse the scale if referendum
gen p10=probability90
gen p50=probability50
gen p90=probability10

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) p10 p50 p90, by(daily)
list daily p10 p50 p90

***Comparison with result
*https://ec.europa.eu/eurostat/databrowser/view/tps00191/default/table?lang=en
gen result2019=142.450
gen result2020=103
*tsforecasts (see below)
gen drift=153.695
gen _smooth=218.8277
gen _smooth_lb=139.6094
gen _smooth_ub=298.046
gen _arima=151.5971
gen _arima_ub=37.19041
gen _arima_lb=266.0039

gen country="Germany"
gen issue="asylum"

save "Germany_asylum2020", replace


****************************************************************
***SPAIN ASYLUM***
import delimited "YOUR_PATH/pm_migration_2020.csv", clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="Q2nyGXgeChzwLJXff"
rename prices contract5
rename v19 contract3
rename v20 contract4
rename v21 contract1
rename v22 contract2

replace contract1 = subinstr(contract1, "tYDf8twSdG2wNNAr2: ", "", .)
replace contract2 = subinstr(contract2, "vPmkP2xmwS6mjMZ7Q: ", "", .)
replace contract3 = subinstr(contract3, "EoFwjoertA63tbLFu: ", "", .)
replace contract4 = subinstr(contract4, "oSmjB4g6jiugwYSSt: ", "", .)
replace contract5 = subinstr(contract5, "9WRrdWGE5nsG4DmXW: ", "", .)

destring contract1, replace
destring contract2, replace
destring contract3, replace
destring contract4, replace
destring contract5, replace

gen clockstring = time

*collapse at the level of the minutes
collapse (first) contract1 contract2 contract3 contract4 contract5,  by(clockstring)

***generate contracts 0, 60, 80, 100, 120 & 180 (symmetric max.!)
g contract0=0
egen contract2_=rowtotal(contract1 - contract2)
egen contract3_=rowtotal(contract1 - contract3)
egen contract4_=rowtotal(contract1 - contract4)
egen contract5_=rowtotal(contract1 - contract5)

drop contract2 contract3 contract4 contract5
ren contract1 contract60
ren contract2_ contract80
ren contract3_ contract100
ren contract4_ contract120
ren contract5_ contract180

*reshape the values so 
list
reshape long contract, i(clockstring) j(probability)
set more off

*round two digits
g contract_r = contract*1000
g contract_r2= round(contract_r, 1)

drop contract contract_r
ren contract_r2 contract

*reshape again but using probabilities
 reshape wide probability, i(clockstring) j(contract) 
set more off

 *fill will the probabilities we are looking for
g probability10000=.
g probability50000=.
g probability90000=.

*reshape so we can do the mipolation
 reshape long probability, i(clockstring) j(contract) 
 set more off
 
*set back the digits
g contract_r= contract/1000
drop contract
ren contract_r contract 

*mipolation, go, go
by clockstring: mipolate probability contract, pchip generate(i_probability)
drop probability
ren i_probability probability

*let's keep only the interesting information...
drop if contract <10
drop if contract>10 & contract<50
drop if contract>50 & contract<90
drop if contract>90

*...so we can then reshape properly (last time I reshape, promise)
 reshape wide probability, i(clockstring) j(contract) 

*reverse the scale if referendum
gen p10=probability90
gen p50=probability50
gen p90=probability10

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) p10 p50 p90, by(daily)
list daily p10 p50 p90

***Comparison with result
*https://ec.europa.eu/eurostat/databrowser/view/tps00191/default/table?lang=engen result2019=143
gen result2020=86.4
gen result2019=115.2
*tsforecasts (see below)
gen drift=127.68889
gen _smooth=65.912
gen _smooth_lb=53.80961
gen _smooth_ub=78.01438
gen _arima=164.7912
gen _arima_ub=192.7188
gen _arima_lb=136.8636

gen country="Spain"
gen issue="asylum"

save "Spain_asylum2020", replace


****************************************************************
***SWITZERLAND ASYLUM***
import delimited "YOUR_PATH/pm_migration_2020.csv", clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="qs8eLMhZM39TvqZ5g"
rename prices contract5
rename v19 contract2
rename v20 contract4
rename v21 contract1
rename v22 contract3

replace contract1 = subinstr(contract1, "q3uRosdF6cCzkASbR: ", "", .)
replace contract2 = subinstr(contract2, "CktadqZKd62FQfx6J: ", "", .)
replace contract3 = subinstr(contract3, "svM9KKL3LtzG7aJiR: ", "", .)
replace contract4 = subinstr(contract4, "i24RTFkwLhaDkhBxQ: ", "", .)
replace contract5 = subinstr(contract5, "2CpNoiFN4kFqFidfi: ", "", .)

destring contract1, replace
destring contract2, replace
destring contract3, replace
destring contract4, replace
destring contract5, replace

gen clockstring = time


*collapse at the level of the minutes
collapse (first) contract1 contract2 contract3 contract4 contract5,  by(clockstring)

***generate contracts 0, 10, 12.5, 15, 17.5 & 27.5 (symmetric max.!)
g contract0=0
egen contract2_=rowtotal(contract1 - contract2)
egen contract3_=rowtotal(contract1 - contract3)
egen contract4_=rowtotal(contract1 - contract4)
egen contract5_=rowtotal(contract1 - contract5)

drop contract2 contract3 contract4 contract5
ren contract1 contract10000
ren contract2_ contract12500
ren contract3_ contract15000
ren contract4_ contract17500
ren contract5_ contract27500

*reshape the values so 
list
reshape long contract, i(clockstring) j(probability)
set more off

*round two digits
g contract_r = contract*1000
g contract_r2= round(contract_r, 1)

drop contract contract_r
ren contract_r2 contract

*reshape again but using probabilities
 reshape wide probability, i(clockstring) j(contract) 
set more off

 *fill will the probabilities we are looking for
g probability10000=.
g probability50000=.
g probability90000=.

*reshape so we can do the mipolation
 reshape long probability, i(clockstring) j(contract) 
 set more off
 
*set back the digits
g contract_r= contract/1000
drop contract
ren contract_r contract 

*mipolation, go, go
by clockstring: mipolate probability contract, pchip generate(i_probability)
drop probability
ren i_probability probability

*let's keep only the interesting information...

drop if contract <10
drop if contract>10 & contract<50
drop if contract>50 & contract<90
drop if contract>90

*...so we can then reshape properly (last time I reshape, promise)
 reshape wide probability, i(clockstring) j(contract) 

*reverse the scale if referendum
gen p10=probability90
gen p50=probability50
gen p90=probability10

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) p10 p50 p90, by(daily)
list daily p10 p50 p90

***Comparison with result
*https://www.sem.admin.ch/sem/de/home/publiservice/statistik/asylstatistik/archiv/2020.html
gen result2020=11.041
gen result2019=14.269
*tsforecasts (see below)
gen drift=14.12478
gen _smooth=18.18308
gen _smooth_lb=16.3146
gen _smooth_ub=20.05155
gen _arima=14.13033
gen _arima_ub=17.95629
gen _arima_lb=10.30436

gen country="Switzerland"
gen issue="asylum"

save "Switzerland_asylum2020", replace


****************************************************************
***UK ASYLUM***
import delimited "YOUR_PATH/pm_migration_2020.csv", clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="pSTKTEsfpvWf43CQa"
rename prices contract3
rename v19 contract2
rename v20 contract5
rename v21 contract1
rename v22 contract4

replace contract1 = subinstr(contract1, "bMur5N7fsbcBky8hm: ", "", .)
replace contract2 = subinstr(contract2, "EgtbWKWvdZT7kyYeY: ", "", .)
replace contract3 = subinstr(contract3, "AE8MzEQ42iwrbqEAp: ", "", .)
replace contract4 = subinstr(contract4, "jjQ2gaPXcMsZxLDyQ: ", "", .)
replace contract5 = subinstr(contract5, "T9Lw6mHndTHfFsRiS: ", "", .)

destring contract1, replace
destring contract2, replace
destring contract3, replace
destring contract4, replace
destring contract5, replace

gen clockstring = time

*collapse at the level of the minutes
collapse (first) contract1 contract2 contract3 contract4 contract5,  by(clockstring)

***generate contracts 0, 30, 35, 40, 45 & 75 (symmetric max.!)
g contract0=0
egen contract2_=rowtotal(contract1 - contract2)
egen contract3_=rowtotal(contract1 - contract3)
egen contract4_=rowtotal(contract1 - contract4)
egen contract5_=rowtotal(contract1 - contract5)

drop contract2 contract3 contract4 contract5
ren contract1 contract30
ren contract2_ contract35
ren contract3_ contract40
ren contract4_ contract45
ren contract5_ contract75

*reshape the values so 
list
reshape long contract, i(clockstring) j(probability)
set more off

*round two digits
g contract_r = contract*1000
g contract_r2= round(contract_r, 1)

drop contract contract_r
ren contract_r2 contract

*reshape again but using probabilities
 reshape wide probability, i(clockstring) j(contract) 
set more off

 *fill will the probabilities we are looking for
g probability10000=.
g probability50000=.
g probability90000=.

*reshape so we can do the mipolation
 reshape long probability, i(clockstring) j(contract) 
 set more off
 
*set back the digits
g contract_r= contract/1000
drop contract
ren contract_r contract 

*mipolation, go, go
by clockstring: mipolate probability contract, pchip generate(i_probability)
drop probability
ren i_probability probability

*let's keep only the interesting information...
drop if contract <10
drop if contract>10 & contract<50
drop if contract>50 & contract<90
drop if contract>90

*...so we can then reshape properly (last time I reshape, promise)
 reshape wide probability, i(clockstring) j(contract) 

*reverse the scale if referendum
gen p10=probability90
gen p50=probability50
gen p90=probability10

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) p10 p50 p90, by(daily)
list daily p10 p50 p90

***Comparison with result
*Table in dropbox folder
gen result2019=35.737
gen result2020=29
*tsforecasts (see below)
gen drift=37.71711
gen _smooth=31.35188
gen _smooth_lb=28.53055	
gen _smooth_ub=34.1732
gen _arima=36.34441
gen _arima_ub=40.0566
gen _arima_lb=32.63222

gen country="UK"
gen issue="asylum"

save "UK_asylum2020", replace


****************************************************************
***Germany ASYLUM Groups***
import delimited "YOUR_PATH/pm_migration_2020.csv", clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="SMR5DNa8MGyPPRB8N"
rename prices Irak
rename v19 Turkey
rename v20 Iran
rename v21 Syria
rename v22 Other
rename v23 Afghanistan

replace Syria = subinstr(Syria, "PW4RYTo2ywjiRTXJ9: ", "", .)
replace Irak = subinstr(Irak, "3C5pm5xDhEy6AoA8q: ", "", .)
replace Afghanistan = subinstr(Afghanistan, "pruas9QMe7L6NecDd: ", "", .)
replace Turkey = subinstr(Turkey, "9kaz6WaKbBAxg5toj: ", "", .)
replace Iran = subinstr(Iran, "MyTRab7sSJxEhREj8: ", "", .)
replace Other = subinstr(Other, "gpBBt84xcSBH6R6d4: ", "", .)

destring Syria, replace
destring Irak, replace
destring Afghanistan, replace
destring Turkey, replace
destring Iran, replace
destring Other, replace

gen clockstring = time

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) Syria Irak Afghanistan Turkey Iran Other, by(daily)

***Comparison with result
*https://ec.europa.eu/eurostat/statistics-explained/index.php?title=File:Table_1_Five_main_citizenships_of_first-time_asylum_applicants_(non-EU_citizens),_2020_(number,_rounded_figures)_v2.png
gen Syria_abs=36435
gen Irak_abs=9845
gen Afghanistan_abs=9900
gen Turkey_abs=5780
gen Iran_abs=3120

gen country="Germany"
gen issue="asgroups"

save "Germany_asgroups2020", replace


****************************************************************
***Spain ASYLUM Groups***
import delimited "YOUR_PATH/pm_migration_2020.csv", clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="PgsodWj3FRje9YnGG"
rename prices Honduras
rename v19 Nicaragua
rename v20 Venezuela
rename v21 ElSalvador
rename v22 Other
rename v23 Colombia

replace Venezuela = subinstr(Venezuela, "JWcxKGRqFRasoxcSH: ", "", .)
replace Colombia = subinstr(Colombia, "vv637NZH27pQFktpD: ", "", .)
replace Honduras = subinstr(Honduras, "DMPbDr2eXreKwrA97: ", "", .)
replace Nicaragua = subinstr(Nicaragua, "DcWdjF3FodCgdeyqF: ", "", .)
replace ElSalvador = subinstr(ElSalvador, "MNXTRjTAyHF4baZaq: ", "", .)
replace Other = subinstr(Other, "Qv9A3k3gaKMbfYPgk: ", "", .)

destring Venezuela, replace
destring Colombia, replace
destring Honduras, replace
destring Nicaragua, replace
destring ElSalvador, replace
destring Other, replace

gen clockstring = time

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) Venezuela Colombia Honduras Nicaragua ElSalvador Other, by(daily)

***Comparison with result
*https://ec.europa.eu/eurostat/statistics-explained/index.php?title=File:Table_1_Five_main_citizenships_of_first-time_asylum_applicants_(non-EU_citizens),_2020_(number,_rounded_figures)_v2.png
gen Venezuela_abs=28065
gen Colombia_abs=27180
gen Honduras_abs=5465
gen Nicaragua_abs=3680
gen ElSalvador_abs=2475

gen country="Spain"
gen issue="asgroups"

save "Spain_asgroups2020", replace


****************************************************************
***Switzerland ASYLUM Groups***
import delimited "YOUR_PATH/pm_migration_2020.csv", clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="5MikQ957MPj5Xok48"
rename prices Irak
rename v19 Afghanistan
rename v20 Algeria
rename v21 Syria
rename v22 Other
rename v23 Eritrea

replace Syria = subinstr(Syria, "mBXmEWgaX3rgYCQMS: ", "", .)
replace Irak = subinstr(Irak, "4SC6e3fT2aHQTEC9p: ", "", .)
replace Afghanistan = subinstr(Afghanistan, "bt2TPvrGZutrQrhnt: ", "", .)
replace Eritrea = subinstr(Eritrea, "w5roE8HE2juzEgACB: ", "", .)
replace Algeria = subinstr(Algeria, "g72cJ8Ga9Ga9vWM8p: ", "", .)
replace Other = subinstr(Other, "p2AZRZPWQWjNDSLXz: ", "", .)

destring Syria, replace
destring Irak, replace
destring Afghanistan, replace
destring Eritrea, replace
destring Algeria, replace
destring Other, replace

gen clockstring = time

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) Syria Irak Afghanistan Eritrea Algeria Other, by(daily)

***Comparison with result
*https://ec.europa.eu/eurostat/statistics-explained/index.php?title=File:Table_1_Five_main_citizenships_of_first-time_asylum_applicants_(non-EU_citizens),_2020_(number,_rounded_figures)_v2.png
gen Syria_abs=755
gen Irak_abs=270
gen Afghanistan_abs=1630
gen Eritrea_abs=1635
gen Algeria_abs=935

gen country="Switzerland"
gen issue="asgroups"

save "Switzerland_asgroups2020", replace


****************************************************************
***UK ASYLUM Groups***
import delimited "YOUR_PATH/pm_migration_2020.csv", clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="9KeMqZhmfb4vsjaBa"
rename prices Other
rename v19 Pakistan
rename v20 Iran
rename v21 Albania
rename v22 Eritrea
rename v23 Irak

replace Pakistan = subinstr(Pakistan, "N4q88vnf6ha5pQNeJ: ", "", .)
replace Irak = subinstr(Irak, "uhTyAPqsYryCoS89o: ", "", .)
replace Albania = subinstr(Albania, "ij9FsFiDCyiboqauT: ", "", .)
replace Eritrea = subinstr(Eritrea, "ndsX2oPnohNsqqBzp: ", "", .)
replace Iran = subinstr(Iran, "i6MR9oqacDhuywFWh: ", "", .)
replace Other = subinstr(Other, "JfQ3FaHeDrekEEnmW: ", "", .)

destring Pakistan, replace
destring Irak, replace
destring Albania, replace
destring Eritrea, replace
destring Iran, replace
destring Other, replace

gen clockstring = time

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) Pakistan Irak Albania Eritrea Iran Other, by(daily)
tsset daily
tsline Pakistan Irak Albania Eritrea Iran Other

***Comparison with result
*https://ec.europa.eu/eurostat/statistics-explained/index.php?title=File:Table_1_Five_main_citizenships_of_first-time_asylum_applicants_(non-EU_citizens),_2020_(number,_rounded_figures)_v2.png
gen Pakistan_abs=1205
gen Irak_abs=2304
gen Albania_abs=2784
gen Eritrea_abs=2496
gen Iran_abs=3847

gen country="UK"
gen issue="asgroups"

save "UK_asgroups2020", replace


**********TIME SERIES FORECASTS FOR IMMIGRATION**********
import excel "/Users/admin/Dropbox/Sampling_2020JanuaryFebruary/immigration_ts.xlsx", sheet("Tabelle1") firstrow clear

gen imm_ch_2020=imm_ch if year==2020
gen imm_de_2020=imm_de if year==2020
gen imm_es_2020=imm_es if year==2020

replace imm_ch=. if year==2020
replace imm_de=. if year==2020
replace imm_es=. if year==2020

gen block=3
replace block=2 if year<2018
replace block=1 if year<2015

tsset year

*TIME SERIES METHOD?
twoway connected imm_ch year
twoway connected imm_de year
twoway connected imm_es year

*SIMPLE FORECASTS
*gen naive
gen naive_imm_ch=l.imm_ch
gen naive_imm_de=l.imm_de
gen naive_imm_es=l.imm_es
*drift(?)=yt-1+c, where c=(yT−yt-1)/(T−1)
gen c=(106231-118629)/(2019-2008)
gen drift_imm_ch=l.imm_ch+c
gen d=(1345943-573815)/(2019-2008)
gen drift_imm_de=l.imm_de+d
gen e=(666022-567372)/(2019-2008)
gen drift_imm_es=l.imm_es+e
*simple exponential smoothing
tssmooth exponential smooth_imm_ch=imm_ch, parms(.4) forecast(1)
save immigration_imm_a1, replace
// Bootstrap the standard error of the mean
bootstrap r(mean), reps(1000) seed(123456) cluster(block) saving(bs_results, replace): summarize smooth_imm_ch
// Load the bootstrap results
use bs_results, clear
// Calculate the mean and standard error from bootstrap results
egen bootstrap_mean = mean(_bs_1)
egen bootstrap_se = sd(_bs_1)
summ bootstrap_mean bootstrap_se
// Save the results back into the original dataset (I am unable to do this with code, sorry...)
use immigration_imm_a1.dta, clear
gen b_se_boots_imm_ch = 1457.47
gen lb_boots_imm_ch = smooth_imm_ch - 1.282*b_se_boots_imm_ch
gen ub_boots_imm_ch = smooth_imm_ch + 1.282*b_se_boots_imm_ch

tssmooth exponential smooth_imm_de=imm_de, parms(.4) forecast(1)
save immigration_imm_a2, replace
// Bootstrap the standard error of the mean
bootstrap r(mean), reps(1000) seed(123456) cluster(block) saving(bs_results, replace): summarize smooth_imm_de
// Load the bootstrap results
use bs_results, clear
// Calculate the mean and standard error from bootstrap results
egen bootstrap_mean = mean(_bs_1)
egen bootstrap_se = sd(_bs_1)
summ bootstrap_mean bootstrap_se
// Save the results back into the original dataset (I am unable to do this with code, sorry...)
use immigration_imm_a2.dta, clear
gen b_se_boots_imm_de = 214695.3
gen lb_boots_imm_de = smooth_imm_de - 1.282*b_se_boots_imm_de
gen ub_boots_imm_de = smooth_imm_de + 1.282*b_se_boots_imm_de

tssmooth exponential smooth_imm_es=imm_es, parms(.4) forecast(1)
save immigration_imm_a3, replace
// Bootstrap the standard error of the mean
bootstrap r(mean), reps(1000) seed(123456) cluster(block) saving(bs_results, replace): summarize smooth_imm_es
// Load the bootstrap results
use bs_results, clear
// Calculate the mean and standard error from bootstrap results
egen bootstrap_mean = mean(_bs_1)
egen bootstrap_se = sd(_bs_1)
summ bootstrap_mean bootstrap_se
// Save the results back into the original dataset (I am unable to do this with code, sorry...)
use immigration_imm_a3.dta, clear
gen b_se_boots_imm_es = 31152.13
gen lb_boots_imm_es = smooth_imm_es - 1.282*b_se_boots_imm_es
gen ub_boots_imm_es = smooth_imm_es + 1.282*b_se_boots_imm_es

*CREATE A STANDARD ARIMA MODEL WITH (1,1,1)
arima imm_ch, arima(1,1,1)
predict arima_imm_ch, dynamic(2020) y 
save immigration_ts1, replace
// Bootstrap the standard error of the mean
bootstrap r(mean), reps(1000) seed(123456) cluster(block) saving(bs_results, replace): summarize arima_imm_ch
// Load the bootstrap results
use bs_results, clear
// Calculate the mean and standard error from bootstrap results
egen bootstrap_mean = mean(_bs_1)
egen bootstrap_se = sd(_bs_1)
summ bootstrap_mean bootstrap_se
// Save the 80%(!) CIs back into the original dataset (I am unable to do this with code, sorry...)
use immigration_ts1.dta, clear
gen b_se_arima_imm_ch = 979.1021
gen lb_arima_imm_ch = arima_imm_ch - 1.282*b_se_arima_imm_ch
gen ub_arima_imm_ch = arima_imm_ch + 1.282*b_se_arima_imm_ch
label variable arima_imm_ch arima_imm_ch

arima imm_de, arima(1,1,1)
predict arima_imm_de, dynamic(2020) y
save immigration_ts2, replace
// Bootstrap the standard error of the mean
bootstrap r(mean), reps(1000) seed(123456) cluster(block) saving(bs_results, replace): summarize arima_imm_de
// Load the bootstrap results
use bs_results, clear
// Calculate the mean and standard error from bootstrap results
egen bootstrap_mean = mean(_bs_1)
egen bootstrap_se = sd(_bs_1)
summ bootstrap_mean bootstrap_se
// Save the results back into the original dataset (I am unable to do this with code, sorry...)
use immigration_ts2.dta, clear
gen b_se_arima_imm_de = 239397.6
gen lb_arima_imm_de = arima_imm_de - 1.282*b_se_arima_imm_de
gen ub_arima_imm_de = arima_imm_de + 1.282*b_se_arima_imm_de
label variable arima_imm_de arima_imm_de

arima imm_es, arima(1,1,1)
predict arima_imm_es, dynamic(2020) y
save immigration_ts3, replace
// Bootstrap the standard error of the mean
bootstrap r(mean), reps(1000) seed(123456) cluster(block) saving(bs_results, replace): summarize arima_imm_es
// Load the bootstrap results
use bs_results, clear
// Calculate the mean and standard error from bootstrap results
egen bootstrap_mean = mean(_bs_1)
egen bootstrap_se = sd(_bs_1)
summ bootstrap_mean bootstrap_se
// Save the results back into the original dataset (I am unable to do this with code, sorry...)
use immigration_ts3.dta, clear
gen b_se_arima_imm_es = 87895.91
gen lb_arima_imm_es = arima_imm_es - 1.282*b_se_arima_imm_es
gen ub_arima_imm_es = arima_imm_es + 1.282*b_se_arima_imm_es
label variable arima_imm_es arima_imm_es

*CREATE ALTERNATIVE ARIMA MODELS

*create first and second differences
gen imm_ch_1d=imm_ch-l.imm_ch
gen imm_de_1d=imm_de-l.imm_de
gen imm_es_1d=imm_es-l.imm_es
gen imm_ch_2d=(imm_ch - l1.imm_ch) - (l1.imm_ch - l2.imm_ch)
gen imm_de_2d=(imm_de - l1.imm_de) - (l1.imm_de - l2.imm_de)
gen imm_es_2d=(imm_es - l1.imm_es) - (l1.imm_es - l2.imm_es)
*Inspect corrolelograms
ac imm_ch_1d
ac imm_de_1d
ac imm_es_1d
ac imm_ch_2d
ac imm_de_2d
ac imm_es_2d
*The series are not autocorrelated with their lagged series. Hence, we stick to MA=1

*Check now for partial autocorrelation
pac imm_ch_1d
pac imm_de_1d
pac imm_es_1d
pac imm_ch_2d
pac imm_de_2d
pac imm_es_2d
*the series do not partially auto correlate with their lagged series at lags 1

arima imm_ch, arima(2,1,1)
predict arima_imm_ch_211, dynamic(2020) y
label variable arima_imm_ch arima_imm_ch
arima imm_de, arima(2,1,1)
predict arima_imm_de_211, dynamic(2020) y
label variable arima_imm_de arima_imm_de
arima imm_es, arima(2,1,1)
predict arima_imm_es_211, dynamic(2020) y
label variable arima_imm_es arima_imm_es

arima imm_ch, arima(1,2,1)
predict arima_imm_ch_121, dynamic(2020) y
label variable arima_imm_ch arima_imm_ch
*arima imm_de, arima(1,2,1)
*predict arima_imm_de_121, dynamic(2020) y
*label variable arima_imm_de arima_imm_de
arima imm_es, arima(1,2,1)
predict arima_imm_es_121, dynamic(2020) y
label variable arima_imm_es arima_imm_es

arima imm_ch, arima(1,1,2)
predict arima_imm_ch_112, dynamic(2020) y
label variable arima_imm_ch arima_imm_ch
*arima imm_de, arima(1,1,2)
*predict arima_imm_de_112, dynamic(2020) y
*label variable arima_imm_de arima_imm_de
arima imm_es, arima(1,1,2)
predict arima_imm_es_112, dynamic(2020) y
label variable arima_imm_es arima_imm_es

list arima_imm_ch arima_imm_de arima_imm_es arima_imm_ch_211 arima_imm_de_211 arima_imm_es_211 arima_imm_ch_121 arima_imm_es_121 arima_imm_ch_112 arima_imm_es_112 if year==2020

replace imm_ch=imm_ch_2020 if year==2020
drop imm_ch_2020
replace imm_de=imm_de_2020 if year==2020
drop imm_de_2020
replace imm_es=imm_es_2020 if year==2020
drop imm_es_2020

gen drift_imm_ch_2020=drift_imm_ch if year==2020
gen naive_imm_ch_2020=naive_imm_ch if year==2020
gen smooth_imm_ch_2020=smooth_imm_ch if year==2020
gen arima_imm_ch_2020=arima_imm_ch if year==2020

gen drift_imm_de_2020=drift_imm_de if year==2020
gen naive_imm_de_2020=naive_imm_de if year==2020
gen smooth_imm_de_2020=smooth_imm_de if year==2020
gen arima_imm_de_2020=arima_imm_de if year==2020

gen drift_imm_es_2020=drift_imm_es if year==2020
gen naive_imm_es_2020=naive_imm_es if year==2020
gen smooth_imm_es_2020=smooth_imm_es if year==2020
gen arima_imm_es_2020=arima_imm_es if year==2020

*Market forecasts for May 31 (see ts analysis above)
gen pm_imm_ch=94026.42 if year==2020
gen pm_imm_ch_lb=68641.59 if year==2020
gen pm_imm_ch_ub=117016.5 if year==2020
gen pm_imm_es=564055.4 if year==2020
gen pm_imm_es_lb=410575.7 if year==2020
gen pm_imm_es_ub=729496.4 if year==2020
gen pm_imm_de=1032360 if year==2020
gen pm_imm_de_lb=726963.4 if year==2020
gen pm_imm_de_ub=1305378 if year==2020

twoway (connected imm_ch year) (connected drift_imm_ch_2020 year) (connected naive_imm_ch_2020 year) (connected smooth_imm_ch_2020 year) (connected arima_imm_ch_2020 year) (connected pm_imm_ch year)
twoway (connected imm_es year) (connected drift_imm_es_2020 year) (connected naive_imm_es_2020 year) (connected smooth_imm_es_2020 year) (connected arima_imm_es_2020 year) (connected pm_imm_es year)
twoway (connected imm_de year) (connected drift_imm_de_2020 year) (connected naive_imm_de_2020 year) (connected smooth_imm_de_2020 year) (connected arima_imm_de_2020 year) (connected pm_imm_de year)


**********TIME SERIES FORECASTS FOR ASYLUM APPLICATIONS**********
import excel "/Users/admin/Dropbox/Sampling_2020JanuaryFebruary/asylum_applications_ts.xlsx", sheet("Tabelle1") firstrow clear

gen asyl_ch_2020=Switzerland if year==2020
gen asyl_de_2020=Germany if year==2020
gen asyl_es_2020=Spain if year==2020
gen asyl_uk_2020=UK if year==2020

replace Switzerland=. if year==2020
replace Germany=. if year==2020
replace Spain=. if year==2020
replace UK=. if year==2020

gen block=3
replace block=2 if year<2018
replace block=1 if year<2015

tsset year

*SIMPLE FORECASTS
*gen naive
gen naive_asyl_ch=l.Switzerland
gen naive_asyl_de=l.Germany
gen naive_asyl_es=l.Spain
gen naive_asyl_uk=l.UK
*drift(?)=yt-1+c, where c=(yT−yt-1)/(T−1)
gen c=(14269-15567)/(2019-2010)
gen drift_asyl_ch=l.Switzerland+c
gen d=(142450-41245)/(2019-2010)
gen drift_asyl_de=l.Germany+d
gen e=(115175-2550)/(2019-2010)
gen drift_asyl_es=l.Spain+e
gen f=(35737-17916)/(2019-2010)
gen drift_asyl_uk=l.UK+f
*simple exponential smoothing
tssmooth exponential smooth_asyl_ch=Switzerland, parms(.4) forecast(1)
save immigration_asyl_a, replace
// Bootstrap the standard error of the mean
bootstrap r(mean), reps(1000) seed(123456) cluster(block) saving(bs_results, replace): summarize smooth_asyl_ch
// Load the bootstrap results
use bs_results, clear
// Calculate the mean and standard error from bootstrap results
egen bootstrap_mean = mean(_bs_1)
egen bootstrap_se = sd(_bs_1)
summ bootstrap_mean bootstrap_se
// Save the results back into the original dataset (I am unable to do this with code, sorry...)
use immigration_asyl_a.dta, clear
gen b_se_boots_asyl_ch = 1457.47
gen lb_boots_ch = smooth_asyl_ch - 1.282*b_se_boots_asyl_ch
gen ub_boots_ch = smooth_asyl_ch + 1.282*b_se_boots_asyl_ch

tssmooth exponential smooth_asyl_de=Germany, parms(.4) forecast(1)
save immigration_asyl_b, replace
// Bootstrap the standard error of the mean
bootstrap r(mean), reps(1000) seed(123456) cluster(block) saving(bs_results, replace): summarize smooth_asyl_de
// Load the bootstrap results
use bs_results, clear
// Calculate the mean and standard error from bootstrap results
egen bootstrap_mean = mean(_bs_1)
egen bootstrap_se = sd(_bs_1)
summ bootstrap_mean bootstrap_se
// Save the results back into the original dataset (I am unable to do this with code, sorry...)
use immigration_asyl_b.dta, clear
gen b_se_boots_asyl_de = 61792.75
gen lb_boots_de = smooth_asyl_de - 1.282*b_se_boots_asyl_de
gen ub_boots_de = smooth_asyl_de + 1.282*b_se_boots_asyl_de

tssmooth exponential smooth_asyl_es=Spain, parms(.4) forecast(1)
save immigration_asyl_c, replace
// Bootstrap the standard error of the mean
bootstrap r(mean), reps(1000) seed(123456) cluster(block) saving(bs_results, replace): summarize smooth_asyl_es
// Load the bootstrap results
use bs_results, clear
// Calculate the mean and standard error from bootstrap results
egen bootstrap_mean = mean(_bs_1)
egen bootstrap_se = sd(_bs_1)
summ bootstrap_mean bootstrap_se
// Save the results back into the original dataset (I am unable to do this with code, sorry...)
use immigration_asyl_c.dta, clear
gen b_se_boots_asyl_es = 9440.238
gen lb_boots_es = smooth_asyl_es - 1.282*b_se_boots_asyl_es
gen ub_boots_es = smooth_asyl_es + 1.282*b_se_boots_asyl_es

tssmooth exponential smooth_asyl_uk=UK, parms(.4) forecast(1)
save immigration_asyl_d, replace
// Bootstrap the standard error of the mean
bootstrap r(mean), reps(1000) seed(123456) cluster(block) saving(bs_results, replace): summarize smooth_asyl_uk
// Load the bootstrap results
use bs_results, clear
// Calculate the mean and standard error from bootstrap results
egen bootstrap_mean = mean(_bs_1)
egen bootstrap_se = sd(_bs_1)
summ bootstrap_mean bootstrap_se
// Save the results back into the original dataset (I am unable to do this with code, sorry...)
use immigration_asyl_d.dta, clear
gen b_se_boots_asyl_uk = 2200.718
gen lb_boots_uk = smooth_asyl_uk - 1.282*b_se_boots_asyl_uk
gen ub_boots_uk = smooth_asyl_uk + 1.282*b_se_boots_asyl_uk

*CREATE AN ARIMA MODEL
arima Switzerland, arima(1,1,1)
predict arima_asyl_ch, dynamic(2020) y
save immigration_asyl1, replace
// Bootstrap the standard error of the mean
bootstrap r(mean), reps(1000) seed(123456) cluster(block) saving(bs_results, replace): summarize arima_asyl_ch
// Load the bootstrap results
use bs_results, clear
// Calculate the mean and standard error from bootstrap results
egen bootstrap_mean = mean(_bs_1)
egen bootstrap_se = sd(_bs_1)
summ bootstrap_mean bootstrap_se
// Save the results back into the original dataset (I am unable to do this with code, sorry...)
use immigration_asyl1.dta, clear
gen b_se_arima_asyl_ch = 2984.373
gen lb_arima_ch = arima_asyl_ch - 1.282*b_se_arima_asyl_ch
gen ub_arima_ch = arima_asyl_ch + 1.282*b_se_arima_asyl_ch
label variable arima_asyl_ch arima_asyl_ch

arima Germany, arima(1,1,1)
predict arima_asyl_de, dynamic(2020) y
save immigration_asyl2, replace
// Bootstrap the standard error of the mean
bootstrap r(mean), reps(1000) seed(123456) cluster(block) saving(bs_results, replace): summarize arima_asyl_de
// Load the bootstrap results
use bs_results, clear
// Calculate the mean and standard error from bootstrap results
egen bootstrap_mean = mean(_bs_1)
egen bootstrap_se = sd(_bs_1)
summ bootstrap_mean bootstrap_se
// Save the results back into the original dataset (I am unable to do this with code, sorry...)
use immigration_asyl2.dta, clear
gen b_se_arima_asyl_de = 89240.82
gen lb_arima_de = arima_asyl_de - 1.282*b_se_arima_asyl_de
gen ub_arima_de = arima_asyl_de + 1.282*b_se_arima_asyl_de
label variable arima_asyl_de arima_asyl_de

arima Spain, arima(1,1,1)
predict arima_asyl_es, dynamic(2020) y
save immigration_asyl3, replace
// Bootstrap the standard error of the mean
bootstrap r(mean), reps(1000) seed(123456) cluster(block) saving(bs_results, replace): summarize arima_asyl_es
// Load the bootstrap results
use bs_results, clear
// Calculate the mean and standard error from bootstrap results
egen bootstrap_mean = mean(_bs_1)
egen bootstrap_se = sd(_bs_1)
summ bootstrap_mean bootstrap_se
// Save the results back into the original dataset (I am unable to do this with code, sorry...)
use immigration_asyl3.dta, clear
gen b_se_arima_asyl_es = 21784.42
gen lb_arima_es = arima_asyl_es - 1.282*b_se_arima_asyl_es
gen ub_arima_es = arima_asyl_es + 1.282*b_se_arima_asyl_es
label variable arima_asyl_es arima_asyl_es

arima UK, arima(1,1,1)
predict arima_asyl_uk, dynamic(2020) y
save immigration_asyl4, replace
// Bootstrap the standard error of the mean
bootstrap r(mean), reps(1000) seed(123456) cluster(block) saving(bs_results, replace): summarize arima_asyl_uk
// Load the bootstrap results
use bs_results, clear
// Calculate the mean and standard error from bootstrap results
egen bootstrap_mean = mean(_bs_1)
egen bootstrap_se = sd(_bs_1)
summ bootstrap_mean bootstrap_se
// Save the results back into the original dataset (I am unable to do this with code, sorry...)
use immigration_asyl4.dta, clear
gen b_se_arima_asyl_uk = 2895.621
gen lb_arima_uk = arima_asyl_uk - 1.282*b_se_arima_asyl_uk
gen ub_arima_uk = arima_asyl_uk + 1.282*b_se_arima_asyl_uk
label variable arima_asyl_uk arima_asyl_uk

*CREATE ALTERNATIVE ARIMA MODELS

*create first and second differences
gen asyl_ch_1d=Switzerland-l.Switzerland
gen asyl_de_1d=Germany-l.Germany
gen asyl_es_1d=Spain-l.Spain
gen asyl_uk_1d=UK-l.UK
gen asyl_ch_2d=(Switzerland - l1.Switzerland) - (l1.Switzerland - l2.Switzerland)
gen asyl_de_2d=(Germany - l1.Germany) - (l1.Germany - l2.Germany)
gen asyl_es_2d=(Spain - l1.Spain) - (l1.Spain - l2.Spain)
gen asyl_uk_2d=(UK - l1.UK) - (l1.UK - l2.UK)
*Inspect corrolelograms
ac asyl_ch_1d
ac asyl_de_1d
ac asyl_es_1d
ac asyl_uk_1d
ac asyl_ch_2d
ac asyl_de_2d
ac asyl_es_2d
ac asyl_uk_2d
*The series are not autocorrelated with their lagged series. Hence, we stick to MA=1

*Check now for partial autocorrelation
pac asyl_ch_1d
pac asyl_de_1d
pac asyl_es_1d
pac asyl_uk_1d
pac asyl_ch_2d
pac asyl_de_2d
pac asyl_es_2d
pac asyl_uk_2d
*the series do not partially auto correlate with their lagged series at lags 1

*arima Switzerland, arima(2,1,1)
*predict arima_asyl_ch_211, dynamic(2020) y
*label variable arima_asyl_ch arima_asyl_ch
*arima Germany, arima(2,1,1)
*predict arima_asyl_de_211, dynamic(2020) y
*label variable arima_asyl_de arima_asyl_de
*arima Spain, arima(2,1,1)
*predict arima_asyl_es_211, dynamic(2020) y
*label variable arima_asyl_es arima_asyl_es
*arima UK, arima(2,1,1)
*predict arima_asyl_uk_211, dynamic(2020) y
*label variable arima_asyl_uk arima_asyl_uk

arima Switzerland, arima(1,2,1)
predict arima_asyl_ch_121, dynamic(2020) y
label variable arima_asyl_ch arima_asyl_ch
arima Germany, arima(1,2,1)
predict arima_asyl_de_121, dynamic(2020) y
label variable arima_asyl_de arima_asyl_de
arima Spain, arima(1,2,1)
predict arima_asyl_es_121, dynamic(2020) y
label variable arima_asyl_es arima_asyl_es
arima UK, arima(1,2,1)
predict arima_asyl_uk_121, dynamic(2020) y
label variable arima_asyl_uk arima_asyl_uk

arima Switzerland, arima(1,1,2)
predict arima_asyl_ch_112, dynamic(2020) y
label variable arima_asyl_ch arima_asyl_ch
arima Germany, arima(1,1,2)
predict arima_asyl_de_112, dynamic(2020) y
label variable arima_asyl_de arima_asyl_de
arima Spain, arima(1,1,2)
predict arima_asyl_es_112, dynamic(2020) y
label variable arima_asyl_es arima_asyl_es
arima UK, arima(1,1,2)
predict arima_asyl_uk_112, dynamic(2020) y
label variable arima_asyl_uk arima_asyl_uk

list arima_asyl_ch arima_asyl_de arima_asyl_es arima_asyl_es arima_asyl_ch_121 arima_asyl_de_121 arima_asyl_es_121 arima_asyl_uk_121 arima_asyl_ch_112 arima_asyl_de_112 arima_asyl_es_112 arima_asyl_uk_112 if year==2020

replace Switzerland=asyl_ch_2020 if year==2020
drop asyl_ch_2020
replace Germany=asyl_de_2020 if year==2020
drop asyl_de_2020
replace Spain=asyl_es_2020 if year==2020
drop asyl_es_2020
replace UK=asyl_uk_2020 if year==2020
drop asyl_uk_2020

gen drift_asylum_ch=drift_asyl_ch if year==2020
gen naive_asylum_ch=naive_asyl_ch if year==2020
gen smooth_asylum_ch=smooth_asyl_ch if year==2020
gen arima_asylum_ch=arima_asyl_ch if year==2020

gen drift_asylum_de=drift_asyl_de if year==2020
gen naive_asylum_de=naive_asyl_de if year==2020
gen smooth_asylum_de=smooth_asyl_de if year==2020
gen arima_asylum_de=arima_asyl_de if year==2020

gen drift_asylum_es=drift_asyl_es if year==2020
gen naive_asylum_es=naive_asyl_es if year==2020
gen smooth_asylum_es=smooth_asyl_es if year==2020
gen arima_asylum_es=arima_asyl_es if year==2020

gen drift_asylum_uk=drift_asyl_uk if year==2020
gen naive_asylum_uk=naive_asyl_uk if year==2020
gen smooth_asylum_uk=smooth_asyl_uk if year==2020
gen arima_asylum_uk=arima_asyl_uk if year==2020

*Market forecasts for May 31 (see ts analysis above)
gen pm_asylum_ch=12659.93 if year==2020
gen pm_asylum_ch_lb=10276.9 if year==2020
gen pm_asylum_ch_ub=15848.6 if year==2020
gen pm_asylum_de=135681.2 if year==2020
gen pm_asylum_de_lb=74482.09 if year==2020
gen pm_asylum_de_ub=168229.1 if year==2020
gen pm_asylum_es=75134.49 if year==2020
gen pm_asylum_es_lb=36907.43 if year==2020
gen pm_asylum_es_ub=103391 if year==2020      
gen pm_asylum_uk=34815.65 if year==2020
gen pm_asylum_uk_lb=24345.63 if year==2020
gen pm_asylum_uk_ub=41050.87 if year==2020

twoway (connected Switzerland year) (scatter drift_asylum_ch year) (scatter naive_asylum_ch year) (scatter smooth_asylum_ch year) (scatter arima_asylum_ch year) (scatter pm_asylum_ch year)
twoway (connected Germany year) (scatter drift_asylum_de year) (scatter naive_asylum_de year) (scatter smooth_asylum_de year) (scatter arima_asylum_de year) (scatter pm_asylum_de year)
twoway (connected Spain year) (scatter drift_asylum_es year) (scatter naive_asylum_es year) (scatter smooth_asylum_es year) (scatter arima_asylum_es year) (scatter pm_asylum_es year)
twoway (connected UK year) (scatter drift_asylum_uk year) (scatter naive_asylum_uk year) (scatter smooth_asylum_uk year) (scatter arima_asylum_uk year) (scatter pm_asylum_uk year)


***********Merge the datasets
clear
cd "YOUR_PATH"
use "Germany_immigration2020.dta", clear

append using "Spain_immigration2020.dta"
append using "Switzerland_immigration2020.dta"
append using "Germany_asylum2020.dta"
append using "Spain_asylum2020.dta"
append using "Switzerland_asylum2020.dta"
append using "UK_asylum2020.dta"

save "pm_tseries_migration2020.dta", replace

*Recodings

encode issue, gen(panel)
set obs `=_N+1' 
replace panel=1 if panel==.
set obs `=_N+1' 
replace panel=2 if panel==.
set obs `=_N+1' 
replace panel=3 if panel==.
set obs `=_N+1' 
replace panel=4 if panel==.
set obs `=_N+1' 
replace panel=5 if panel==.
set obs `=_N+1' 
replace panel=6 if panel==.
set obs `=_N+1' 
replace panel=7 if panel==.
set obs `=_N+1' 
replace panel=8 if panel==.
set obs `=_N+1' 
replace panel=9 if panel==.
set obs `=_N+1' 
replace panel=10 if panel==.
set obs `=_N+1' 
replace panel=11 if panel==.
set obs `=_N+1' 
replace panel=12 if panel==.
set obs `=_N+1' 
replace panel=13 if panel==.
set obs `=_N+1' 
replace panel=14 if panel==.
set obs `=_N+1' 
replace panel=15 if panel==.
set obs `=_N+1' 
replace panel=16 if panel==.
set obs `=_N+1' 
replace panel=17 if panel==.
set obs `=_N+1' 
replace panel=18 if panel==.
set obs `=_N+1' 
replace panel=19 if panel==.
set obs `=_N+1' 
replace panel=20 if panel==.
set obs `=_N+1' 
replace panel=21 if panel==.
set obs `=_N+1' 
replace panel=22 if panel==.

drop if daily<22066
drop if daily>22066

*Make sure Swiss data is on the same scale
replace p10=p10/1000 if country=="Switzerland" & issue=="asylum"
replace p50=p50/1000 if country=="Switzerland" & issue=="asylum"
replace p90=p90/1000 if country=="Switzerland" & issue=="asylum"

collapse (mean) p10 p50 p90 result2019 result2020 drift _smooth _smooth_lb _smooth_ub _arima _arima_lb _arima_ub, by(issue country)

***TABLE 1
gen nr = _n
tsset nr
fcstats result2020 p50 if issue=="asylum"
fcstats result2020 result2019 if issue=="asylum" 
fcstats result2020 drift if issue=="asylum" 
fcstats result2020 _smooth if issue=="asylum"
fcstats result2020 _arima if issue=="asylum"

fcstats result2020 p50 if issue=="immigration"
fcstats result2020 result2019 if issue=="immigration" 
fcstats result2020 drift if issue=="immigration" 
fcstats result2020 _smooth if issue=="immigration"
fcstats result2020 _arima if issue=="immigration"

*putting CH on different scale: asylum
replace p10=p10*100 if country=="Switzerland" & issue=="asylum"
replace p50=p50*100 if country=="Switzerland" & issue=="asylum"
replace p90=p90*100 if country=="Switzerland" & issue=="asylum"
replace result2019=result2019*100 if country=="Switzerland" & issue=="asylum"
replace result2020=result2020*100 if country=="Switzerland" & issue=="asylum"
replace drift=drift*100 if country=="Switzerland" & issue=="asylum"
replace _smooth=_smooth*100 if country=="Switzerland" & issue=="asylum"
replace _smooth_lb=_smooth_lb*100 if country=="Switzerland" & issue=="asylum"
replace _smooth_ub=_smooth_ub*100 if country=="Switzerland" & issue=="asylum"
replace _arima=_arima*100 if country=="Switzerland" & issue=="asylum"
replace _arima_lb=_arima_lb*100 if country=="Switzerland" & issue=="asylum"
replace _arima_ub=_arima_ub*100 if country=="Switzerland" & issue=="asylum"

*putting CH on different scale: immigration
replace p10=p10*10 if country=="Switzerland" & issue=="immigration"
replace p50=p50*10 if country=="Switzerland" & issue=="immigration"
replace p90=p90*10 if country=="Switzerland" & issue=="immigration"
replace result2019=result2019*10 if country=="Switzerland" & issue=="immigration"
replace result2020=result2020*10 if country=="Switzerland" & issue=="immigration"
replace drift=drift*10 if country=="Switzerland" & issue=="immigration"
replace _smooth=_smooth*10 if country=="Switzerland" & issue=="immigration"
replace _smooth_lb=_smooth_lb*10 if country=="Switzerland" & issue=="immigration"
replace _smooth_ub=_smooth_ub*10 if country=="Switzerland" & issue=="immigration"
replace _arima=_arima*10 if country=="Switzerland" & issue=="immigration"
replace _arima_lb=_arima_lb*10 if country=="Switzerland" & issue=="immigration"
replace _arima_ub=_arima_ub*10 if country=="Switzerland" & issue=="immigration"

label variable p10 "upper 80% bound"
label variable p90 "lower 80% bound"
label variable p50 "forecast"
label variable result2019 "Result 2019"
label variable result2020 "Result 2020"
label variable _arima_ub "upper 80% bound"
label variable _arima_lb "lower 80% bound"
label variable _smooth_ub "upper 80% bound"
label variable _smooth_lb "lower 80% bound"

*from wide to long
egen id=concat(country issue)
rename result2020 f0
rename p10 f1
rename p50 f2
rename p90 f3
rename result2019 f4
rename drift f5
rename _smooth f6
rename _smooth_lb f7
rename _smooth_ub f8
rename _arima f9
rename _arima_lb f10
rename _arima_ub f11
drop nr
reshape long f, i(id) j(nr)
gen type=.
replace type=1 if nr==0
replace type=2 if nr==1 | nr==2 | nr==3
replace type=3 if nr==4 | nr==5 | nr==6 | nr==7 | nr==8 | nr==9
replace type=4 if nr==6 | nr==7 | nr==8
replace type=5 if nr==9 | nr==10 | nr==11
save "pm_tseries_migration2020_part.dta", replace
*Asylum
drop if issue!="asylum"
drop id
egen id=concat(country type)
reshape wide f type country, i(id) j(nr) 
gen id2=country0 if country0!=""
replace id2=country1 if country1!=""
replace id2=country4 if country4!=""
replace id2=country6 if country6!=""
replace id2=country9 if country9!=""
*reshape long type, i(id2) j(pf)
gen method=1 if type0==1
replace method=2 if type2==2
replace method=3 if type4==3
replace method=4 if type6==4
replace method=5 if type9==5

*set scheme
set scheme tab1
set scheme white_tableau

***FIGURE 2
graph dot (asis) f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 f10 f11, over(method) by(id2)

*Immigrations
use "pm_tseries_migration2020_part.dta", clear
drop if issue!="immigration"
drop id
egen id=concat(country type)
reshape wide f type country, i(id) j(nr) 
gen id2=country0 if country0!=""
replace id2=country1 if country1!=""
replace id2=country4 if country4!=""
replace id2=country6 if country6!=""
replace id2=country9 if country9!=""
*reshape long type, i(id2) j(pf)
gen method=1 if type0==1
replace method=2 if type2==2
replace method=3 if type4==3
replace method=4 if type6==4
replace method=5 if type9==5
*set scheme
set scheme tab1
set scheme white_tableau

***FIGURE 3
graph dot (asis) f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 f10 f11, over(method) by(id2)


***
clear
cd "YOUR_PATH"
use "Germany_asgroups2020.dta", clear
append using "Spain_asgroups2020.dta"
append using "Switzerland_asgroups2020.dta"
append using "UK_asgroups2020.dta"

drop if daily<22067
drop if daily>22096

collapse (mean) Syria Irak Afghanistan Turkey Iran Syria_abs Irak_abs ///
Afghanistan_abs Turkey_abs Iran_abs Venezuela Colombia Honduras Nicaragua ///
ElSalvador Venezuela_abs Colombia_abs Honduras_abs Nicaragua_abs ElSalvador_abs ///
Eritrea Algeria Eritrea_abs Algeria_abs Pakistan Albania Pakistan_abs ///
Albania_abs, by(country)
rename Syria prob1
rename Syria_abs abs1
rename Irak prob2
rename Irak_abs abs2
rename Afghanistan prob3
rename Afghanistan_abs abs3
rename Turkey prob4
rename Turkey_abs abs4
rename Iran prob5
rename Iran_abs abs5
rename Venezuela prob6
rename Venezuela_abs abs6
rename Colombia prob7
rename Colombia_abs abs7
rename Honduras prob8
rename Honduras_abs abs8
rename Nicaragua prob9
rename Nicaragua_abs abs9
rename ElSalvador prob10
rename ElSalvador_abs abs10
rename Eritrea prob11
rename Eritrea_abs abs11
rename Algeria prob12
rename Algeria_abs abs12
rename Pakistan prob13
rename Pakistan_abs abs13
rename Albania prob14
rename Albania abs14
reshape long prob abs, i(country)

rename _j group
label define groupl 1 "Syria" 2 "Iraq" 3 "Afghanistan" 4 "Turkey" 5 "Iran" ///
6 "Venezuela" 7 "Colombia" 8 "Honduras" 9 "Nicaragua" 10 "El Salvador" ///
11 "Eritrea" 12 "Algeria" 13 "Pakistan" 14 "Albania" 
label values group groupl  

*Regression
encode country, gen(cntry)
reg prob abs i.cntry
est store m1
esttab m1

gen abs1000=abs/1000 if country=="Germany" | country=="Spain"
replace abs1000=abs/100 if country=="Switzerland" | country=="UK"

replace country="Great Britain" if country=="UK"

rename prob Probability
label variable Probability "Forecast: Probability of being largest in %"
label variable abs1000 "Absolute in 1000s"

***FIGURE 4
twoway (scatter Probability abs1000 if country=="Great Britain", ytitle("Probability in %") mlabel(group)) (scatter Probability abs1000 if country=="Switzerland", ytitle("Probability in %") mlabel(group)) (scatter Probability abs1000 if country=="Germany", ytitle("Probability in %") mlabel(group)) (scatter Probability abs1000 if country=="Spain", ytitle("Probability in %") mlabel(group)) (lfit Probability abs1000, ) (lfit Probability abs1000 if country=="Great Britain", ) (lfit Probability abs1000 if country=="Switzerland", ) (lfit Probability abs1000 if country=="Germany", ) (lfit Probability abs1000 if country=="Spain", )
twoway (scatter Probability abs1000, by(country) ytitle("Probability in %") mlabel(group)) (lfit Probability abs1000, by(country))




